From 2b99d6e3a9c86d4532faa510bcf2dbd3ab2c2c77 Mon Sep 17 00:00:00 2001 From: "emellor@leeni.uk.xensource.com" Date: Wed, 19 Oct 2005 11:47:51 +0100 Subject: [PATCH] Take advantage of the new UUID (handle) stored for us in Xen to improve the recreation semantics. Remove the unpause at the end of xc_linux_restore, and move it to XendDomainInfo. This is necessary because xenstored now allocates the domain path when the domain is introduced, which means that the new domain cannot start running until that introduce is performed and the new devices configured. Give restore a separate completion phase in which domain details are stored. This is required because the domain path is no longer available until after the introduceDomain call. TODO: Split the domain introduction into two so that the domain path is available earlier. At the moment, the domain <-> store channel details are passed in to xenstored when the domain is introduced, but in the case of restore it is necessary to wait until the restore is completed before the channel MFN is available. Change the interface between XendDomainInfo and XendCheckpoint/image to not have hideous callbacks through setConsoleRef and setStoreRef. Instead, image.createImage explicitly returns those values, and XendCheckpoint passes them through to completeRestore. Move the purging of the domain path corresponding to a new domain from Xend to xenstored, since xenstored is now in charge of this path. With the domain path creation moved to xenstored, Xend cannot remove the path, because watches may have fired on it already. Fix the printf statement in xenstored in verbose mode that details the messages being written. This statement was printing the buffer using %s, but this buffer has an explicit length field, so we were seeing garbage after the correct details. Signed-off-by: Ewan Mellor --- tools/libxc/xc_linux_restore.c | 10 -- tools/python/xen/xend/XendCheckpoint.py | 59 ++++---- tools/python/xen/xend/XendDomain.py | 4 +- tools/python/xen/xend/XendDomainInfo.py | 184 ++++++++++++------------ tools/python/xen/xend/image.py | 76 ++++------ tools/xenstore/xenstored_core.c | 49 +++++-- tools/xenstore/xenstored_core.h | 4 + tools/xenstore/xenstored_domain.c | 2 + 8 files changed, 198 insertions(+), 190 deletions(-) diff --git a/tools/libxc/xc_linux_restore.c b/tools/libxc/xc_linux_restore.c index 2e27db651d..e4eb81b0d6 100644 --- a/tools/libxc/xc_linux_restore.c +++ b/tools/libxc/xc_linux_restore.c @@ -642,16 +642,6 @@ int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, unsigned long nr_pf goto out; } - DPRINTF("Domain ready to be unpaused\n"); - op.cmd = DOM0_UNPAUSEDOMAIN; - op.u.unpausedomain.domain = (domid_t)dom; - rc = xc_dom0_op(xc_handle, &op); - if (rc == 0) { - /* Success: print the domain id. */ - DPRINTF("DOM=%u\n", dom); - return 0; - } - out: if ( (rc != 0) && (dom != 0) ) xc_domain_destroy(xc_handle, dom); diff --git a/tools/python/xen/xend/XendCheckpoint.py b/tools/python/xen/xend/XendCheckpoint.py index 25c496add0..828fd0f2eb 100644 --- a/tools/python/xen/xend/XendCheckpoint.py +++ b/tools/python/xen/xend/XendCheckpoint.py @@ -118,9 +118,11 @@ def restore(xd, fd): dominfo = xd.restore_(vmconfig) - assert dominfo.store_channel - assert dominfo.console_channel - assert dominfo.getDomainPath() + store_port = dominfo.getStorePort() + console_port = dominfo.getConsolePort() + + assert store_port + assert console_port try: l = read_exact(fd, sizeof_unsigned_long, @@ -130,32 +132,19 @@ def restore(xd, fd): raise XendError( "not a valid guest state file: pfn count out of range") - store_evtchn = dominfo.store_channel - console_evtchn = dominfo.console_channel - - cmd = [xen.util.auxbin.pathTo(XC_RESTORE), str(xc.handle()), str(fd), - str(dominfo.getDomid()), str(nr_pfns), - str(store_evtchn), str(console_evtchn)] + cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE), + xc.handle(), fd, dominfo.getDomid(), nr_pfns, + store_port, console_port]) log.debug("[xc_restore]: %s", string.join(cmd)) - def restoreInputHandler(line, _): - m = re.match(r"^(store-mfn) (\d+)$", line) - if m: - store_mfn = int(m.group(2)) - dominfo.setStoreRef(store_mfn) - log.debug("IntroduceDomain %d %d %d", - dominfo.getDomid(), - store_mfn, - dominfo.store_channel) - IntroduceDomain(dominfo.getDomid(), - store_mfn, - dominfo.store_channel) - else: - m = re.match(r"^(console-mfn) (\d+)$", line) - if m: - dominfo.setConsoleRef(int(m.group(2))) - - forkHelper(cmd, fd, restoreInputHandler, True) + handler = RestoreInputHandler() + + forkHelper(cmd, fd, handler.handler, True) + + if handler.store_mfn is None or handler.console_mfn is None: + raise XendError('Could not read store/console MFN') + + dominfo.completeRestore(handler.store_mfn, handler.console_mfn) return dominfo except: @@ -163,6 +152,22 @@ def restore(xd, fd): raise +class RestoreInputHandler: + def __init__(self): + self.store_mfn = None + self.console_mfn = None + + + def handler(self, line, _): + m = re.match(r"^(store-mfn) (\d+)$", line) + if m: + self.store_mfn = int(m.group(2)) + else: + m = re.match(r"^(console-mfn) (\d+)$", line) + if m: + self.console_mfn = int(m.group(2)) + + def forkHelper(cmd, fd, inputHandler, closeToChild): child = xPopen3(cmd, True, -1, [fd, xc.handle()]) diff --git a/tools/python/xen/xend/XendDomain.py b/tools/python/xen/xend/XendDomain.py index 3046c7eece..235e39f633 100644 --- a/tools/python/xen/xend/XendDomain.py +++ b/tools/python/xen/xend/XendDomain.py @@ -347,7 +347,7 @@ class XendDomain: dominfo = self.domain_lookup(domid) log.info("Domain %s (%d) unpaused.", dominfo.getName(), dominfo.getDomid()) - return xc.domain_unpause(dom=dominfo.getDomid()) + return dominfo.unpause() except Exception, ex: raise XendError(str(ex)) @@ -358,7 +358,7 @@ class XendDomain: dominfo = self.domain_lookup(domid) log.info("Domain %s (%d) paused.", dominfo.getName(), dominfo.getDomid()) - return xc.domain_pause(dom=dominfo.getDomid()) + return dominfo.pause() except Exception, ex: raise XendError(str(ex)) diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py index 614ae07f87..59bd0f24a6 100644 --- a/tools/python/xen/xend/XendDomainInfo.py +++ b/tools/python/xen/xend/XendDomainInfo.py @@ -145,8 +145,6 @@ def create(config): try: vm.construct() vm.initDomain() - vm.construct_image() - vm.configure() vm.storeVmDetails() vm.storeDomDetails() vm.refreshShutdown() @@ -167,6 +165,7 @@ def recreate(xeninfo, priv): assert not xeninfo['dying'] domid = xeninfo['dom'] + uuid1 = xeninfo['handle'] dompath = GetDomainPath(domid) if not dompath: raise XendError( @@ -176,44 +175,34 @@ def recreate(xeninfo, priv): if not vmpath: raise XendError( 'No vm path in store for existing domain %d' % domid) - uuid1_str = xstransact.Read(vmpath, "uuid") - if not uuid1_str: + uuid2_str = xstransact.Read(vmpath, "uuid") + if not uuid2_str: raise XendError( 'No vm/uuid path in store for existing domain %d' % domid) - uuid1 = uuid.fromString(uuid1_str) - - uuid2 = xeninfo['handle'] + uuid2 = uuid.fromString(uuid2_str) if uuid1 != uuid2: raise XendError( 'Uuid in store does not match uuid for existing domain %d: ' - '%s != %s' % (domid, uuid1_str, uuid.toString(uuid2))) + '%s != %s' % (domid, uuid2_str, uuid.toString(uuid1))) - log.info("Recreating domain %d, UUID %s.", domid, uuid1_str) + log.info("Recreating domain %d, UUID %s.", domid, uuid2_str) - vm = XendDomainInfo(uuid2, xeninfo, domid, dompath, True) + vm = XendDomainInfo(uuid1, xeninfo, domid, dompath, True) except Exception, exn: log.warn(str(exn)) - if priv: - new_uuid = [0 for i in range(0, 16)] - else: - new_uuid = uuid.create() - - log.info("Recreating domain %d with new UUID %s.", domid, - uuid.toString(new_uuid)) + log.info("Recreating domain %d with UUID %s.", domid, + uuid.toString(uuid1)) - vm = XendDomainInfo(new_uuid, xeninfo, domid, dompath, True) + vm = XendDomainInfo(uuid1, xeninfo, domid, dompath, True) vm.removeDom() + vm.removeVm() vm.storeVmDetails() vm.storeDomDetails() - if domid != 0: - # Setup store and console channels - vm.create_channels() - vm.refreshShutdown(xeninfo) return vm @@ -230,11 +219,8 @@ def restore(config): parseConfig(config)) try: vm.construct() - vm.configure() - vm.create_channels() vm.storeVmDetails() - vm.storeDomDetails() - vm.refreshShutdown() + vm.createChannels() return vm except: vm.destroy() @@ -379,9 +365,9 @@ class XendDomainInfo: self.image = None - self.store_channel = None + self.store_port = None self.store_mfn = None - self.console_channel = None + self.console_port = None self.console_mfn = None self.state = STATE_DOM_OK @@ -571,6 +557,18 @@ class XendDomainInfo: ## public: + def completeRestore(self, store_mfn, console_mfn): + + self.store_mfn = store_mfn + self.console_mfn = console_mfn + + self.introduceDomain() + self.create_devices() + self.storeDomDetails() + self.unpause() + self.refreshShutdown() + + def storeVmDetails(self): to_store = { 'uuid': uuid.toString(self.uuidbytes), @@ -604,6 +602,15 @@ class XendDomainInfo: if v: to_store[k] = str(v) + def f(n, v): + if v is not None: + to_store[n] = str(v) + + f('console/port', self.console_port) + f('console/ring-ref', self.console_mfn) + f('store/port', self.store_port) + f('store/ring-ref', self.store_mfn) + to_store.update(self.vcpuDomDetails()) log.debug("Storing domain details: %s", to_store) @@ -649,6 +656,16 @@ class XendDomainInfo: return self.dompath + def getStorePort(self): + """For use only by image.py and XendCheckpoint.py.""" + return self.store_port + + + def getConsolePort(self): + """For use only by image.py and XendCheckpoint.py""" + return self.console_port + + def getVCpuCount(self): return self.info['vcpus'] @@ -666,10 +683,6 @@ class XendDomainInfo: """Get this domain's target memory size, in KiB.""" return self.info['memory_KiB'] - def setStoreRef(self, ref): - self.store_mfn = ref - self.storeDom("store/ring-ref", ref) - def refreshShutdown(self, xeninfo = None): # If set at the end of this method, a restart is required, with the @@ -704,6 +717,11 @@ class XendDomainInfo: return elif xeninfo['crashed']: + if self.readDom('xend/shutdown_completed'): + # We've seen this shutdown already, but we are preserving + # the domain for debugging. Leave it alone. + return + log.warn('Domain has crashed: name=%s id=%d.', self.info['name'], self.domid) @@ -735,6 +753,11 @@ class XendDomainInfo: else: self.destroy() + elif self.dompath is None: + # We have yet to manage to call introduceDomain on this + # domain. This can happen if a restore is in progress, or has + # failed. Ignore this domain. + pass else: # Domain is alive. If we are shutting it down, then check # the timeout on that, and destroy it if necessary. @@ -802,11 +825,6 @@ class XendDomainInfo: ## public: - def setConsoleRef(self, ref): - self.console_mfn = ref - self.storeDom("console/ring-ref", ref) - - def setMemoryTarget(self, target): """Set the memory target of this domain. @param target In MiB. @@ -1034,15 +1052,20 @@ class XendDomainInfo: raise VmError('Creating domain failed: name=%s' % self.info['name']) - self.dompath = DOMROOT + str(self.domid) - - # Ensure that the domain entry is clean. This prevents a stale - # shutdown_start_time from killing the domain, for example. - self.removeDom() - # Set maximum number of vcpus in domain xc.domain_max_vcpus(self.domid, int(self.info['vcpus'])) + + def introduceDomain(self): + assert self.domid is not None + assert self.store_mfn is not None + assert self.store_port is not None + + IntroduceDomain(self.domid, self.store_mfn, self.store_port) + self.dompath = GetDomainPath(self.domid) + assert self.dompath + + def initDomain(self): log.debug('XendDomainInfo.initDomain: %s %s %s', self.domid, @@ -1061,27 +1084,27 @@ class XendDomainInfo: xc.domain_setcpuweight(self.domid, self.info['cpu_weight']) - # XXX Merge with configure_maxmem? m = self.image.getDomainMemory(self.info['memory_KiB']) - xc.domain_setmaxmem(self.domid, m) + xc.domain_setmaxmem(self.domid, maxmem_kb = m) xc.domain_memory_increase_reservation(self.domid, m, 0, 0) cpu = self.info['cpu'] if cpu is not None and cpu != -1: xc.domain_pincpu(self.domid, 0, 1 << cpu) - self.info['start_time'] = time.time() + self.createChannels() - log.debug('init_domain> Created domain=%d name=%s memory=%d', - self.domid, self.info['name'], self.info['memory_KiB']) + channel_details = self.image.createImage() + self.store_mfn = channel_details['store_mfn'] + if 'console_mfn' in channel_details: + self.console_mfn = channel_details['console_mfn'] - def construct_image(self): - """Construct the boot image for the domain. - """ - self.create_channels() - self.image.createImage() - IntroduceDomain(self.domid, self.store_mfn, self.store_channel) + self.introduceDomain() + + self.create_devices() + + self.info['start_time'] = time.time() ## public: @@ -1166,33 +1189,22 @@ class XendDomainInfo: break - def eventChannel(self, path=None): - """Create an event channel to the domain. - - @param path under which port is stored in db + def createChannels(self): + """Create the channels to the domain. """ - if path: - try: - return int(self.readDom(path)) - except: - # The port is not yet set, i.e. the channel has not yet been - # created. - pass + self.store_port = self.createChannel() + self.console_port = self.createChannel() + + def createChannel(self): + """Create an event channel to the domain. + """ try: - port = xc.evtchn_alloc_unbound(dom=self.domid, remote_dom=0) + return xc.evtchn_alloc_unbound(dom=self.domid, remote_dom=0) except: log.exception("Exception in alloc_unbound(%d)", self.domid) raise - self.storeDom(path, port) - return port - - def create_channels(self): - """Create the channels to the domain. - """ - self.store_channel = self.eventChannel("store/port") - self.console_channel = self.eventChannel("console/port") def create_configured_devices(self): for (n, c) in self.info['device']: @@ -1231,6 +1243,14 @@ class XendDomainInfo: self.reconfigureDevice(deviceClass, devid, dev_config) + def pause(self): + xc.domain_pause(self.domid) + + + def unpause(self): + xc.domain_unpause(self.domid) + + ## private: def restart_check(self): @@ -1280,7 +1300,7 @@ class XendDomainInfo: xd = get_component('xen.xend.XendDomain') new_dom = xd.domain_create(config) try: - xc.domain_unpause(new_dom.getDomid()) + new_dom.unpause() except: new_dom.destroy() raise @@ -1355,20 +1375,6 @@ class XendDomainInfo: self.config = sxp.merge(['vm', ['image', blcfg]], self.config) - def configure(self): - """Configure a vm. - - """ - self.configure_maxmem() - self.create_devices() - - - def configure_maxmem(self): - if self.image: - m = self.image.getDomainMemory(self.info['memory_KiB']) - xc.domain_setmaxmem(self.domid, maxmem_kb = m) - - def send_sysrq(self, key): asserts.isCharConvertible(key) diff --git a/tools/python/xen/xend/image.py b/tools/python/xen/xend/image.py index 5453b1089a..4a9d9fc740 100644 --- a/tools/python/xen/xend/image.py +++ b/tools/python/xen/xend/image.py @@ -111,7 +111,8 @@ class ImageHandler: """Entry point to create domain memory image. Override in subclass if needed. """ - self.createDomain() + return self.createDomain() + def createDomain(self): """Build the domain boot image. @@ -128,10 +129,15 @@ class ImageHandler: log.info("buildDomain os=%s dom=%d vcpus=%d", self.ostype, self.vm.getDomid(), self.vm.getVCpuCount()) - err = self.buildDomain() - if err != 0: - raise VmError('Building domain failed: ostype=%s dom=%d err=%d' - % (self.ostype, self.vm.getDomid(), err)) + + result = self.buildDomain() + + if isinstance(result, dict): + return result + else: + raise VmError('Building domain failed: ostype=%s dom=%d err=%s' + % (self.ostype, self.vm.getDomid(), str(result))) + def getDomainMemory(self, mem): """@return The memory required, in KiB, by the domain to store the @@ -151,26 +157,14 @@ class ImageHandler: """Extra cleanup on domain destroy (define in subclass if needed).""" pass - def set_vminfo(self, d): - if d.has_key('store_mfn'): - self.vm.setStoreRef(d.get('store_mfn')) - if d.has_key('console_mfn'): - self.vm.setConsoleRef(d.get('console_mfn')) - class LinuxImageHandler(ImageHandler): ostype = "linux" def buildDomain(self): - if self.vm.store_channel: - store_evtchn = self.vm.store_channel - else: - store_evtchn = 0 - if self.vm.console_channel: - console_evtchn = self.vm.console_channel - else: - console_evtchn = 0 + store_evtchn = self.vm.getStorePort() + console_evtchn = self.vm.getConsolePort() log.debug("dom = %d", self.vm.getDomid()) log.debug("image = %s", self.kernel) @@ -180,16 +174,12 @@ class LinuxImageHandler(ImageHandler): log.debug("ramdisk = %s", self.ramdisk) log.debug("vcpus = %d", self.vm.getVCpuCount()) - ret = xc.linux_build(dom = self.vm.getDomid(), - image = self.kernel, - store_evtchn = store_evtchn, - console_evtchn = console_evtchn, - cmdline = self.cmdline, - ramdisk = self.ramdisk) - if isinstance(ret, dict): - self.set_vminfo(ret) - return 0 - return ret + return xc.linux_build(dom = self.vm.getDomid(), + image = self.kernel, + store_evtchn = store_evtchn, + console_evtchn = console_evtchn, + cmdline = self.cmdline, + ramdisk = self.ramdisk) class VmxImageHandler(ImageHandler): @@ -214,20 +204,13 @@ class VmxImageHandler(ImageHandler): self.dmargs += self.configVNC(imageConfig) - def createImage(self): - """Create a VM for the VMX environment. - """ - self.createDomain() - def buildDomain(self): # Create an event channel self.device_channel = xc.evtchn_alloc_unbound(dom=self.vm.getDomid(), remote_dom=0) log.info("VMX device model port: %d", self.device_channel) - if self.vm.store_channel: - store_evtchn = self.vm.store_channel - else: - store_evtchn = 0 + + store_evtchn = self.vm.getStorePort() log.debug("dom = %d", self.vm.getDomid()) log.debug("image = %s", self.kernel) @@ -236,16 +219,13 @@ class VmxImageHandler(ImageHandler): log.debug("memsize = %d", self.vm.getMemoryTarget() / 1024) log.debug("vcpus = %d", self.vm.getVCpuCount()) - ret = xc.vmx_build(dom = self.vm.getDomid(), - image = self.kernel, - control_evtchn = self.device_channel, - store_evtchn = store_evtchn, - memsize = self.vm.getMemoryTarget() / 1024, - vcpus = self.vm.getVCpuCount()) - if isinstance(ret, dict): - self.set_vminfo(ret) - return 0 - return ret + return xc.vmx_build(dom = self.vm.getDomid(), + image = self.kernel, + control_evtchn = self.device_channel, + store_evtchn = store_evtchn, + memsize = self.vm.getMemoryTarget() / 1024, + vcpus = self.vm.getVCpuCount()) + # Return a list of cmd line args to the device models based on the # xm config file diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c index a66f7d8a48..d9ecefc133 100644 --- a/tools/xenstore/xenstored_core.c +++ b/tools/xenstore/xenstored_core.c @@ -246,8 +246,9 @@ static bool write_messages(struct connection *conn) if (out->inhdr) { if (verbose) - xprintf("Writing msg %s (%s) out to %p\n", + xprintf("Writing msg %s (%.*s) out to %p\n", sockmsg_string(out->hdr.msg.type), + out->hdr.msg.len, out->buffer, conn); ret = conn->write(conn, out->hdr.raw + out->used, sizeof(out->hdr) - out->used); @@ -946,9 +947,29 @@ static bool delete_child(struct connection *conn, corrupt(conn, "Can't find child '%s' in %s", childname, node->name); } + +static int _rm(struct connection *conn, struct node *node, const char *name) +{ + /* Delete from parent first, then if something explodes fsck cleans. */ + struct node *parent = read_node(conn, get_parent(name)); + if (!parent) { + send_error(conn, EINVAL); + return 0; + } + + if (!delete_child(conn, parent, basename(name))) { + send_error(conn, EINVAL); + return 0; + } + + delete_node(conn, node); + return 1; +} + + static void do_rm(struct connection *conn, const char *name) { - struct node *node, *parent; + struct node *node; name = canonicalize(conn, name); node = get_node(conn, name, XS_PERM_WRITE); @@ -972,24 +993,24 @@ static void do_rm(struct connection *conn, const char *name) return; } - /* Delete from parent first, then if something explodes fsck cleans. */ - parent = read_node(conn, get_parent(name)); - if (!parent) { - send_error(conn, EINVAL); - return; + if (_rm(conn, node, name)) { + add_change_node(conn->transaction, name, true); + fire_watches(conn, name, true); + send_ack(conn, XS_RM); } +} - if (!delete_child(conn, parent, basename(name))) { - send_error(conn, EINVAL); + +void internal_rm(const char *name) +{ + struct node *node = read_node(NULL, name); + if (!node) { return; } - - delete_node(conn, node); - add_change_node(conn->transaction, name, true); - fire_watches(conn, name, true); - send_ack(conn, XS_RM); + _rm(NULL, node, name); } + static void do_get_perms(struct connection *conn, const char *name) { struct node *node; diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h index e8f4a28c65..abcb42c38d 100644 --- a/tools/xenstore/xenstored_core.h +++ b/tools/xenstore/xenstored_core.h @@ -154,6 +154,10 @@ void __attribute__((noreturn)) corrupt(struct connection *conn, struct connection *new_connection(connwritefn_t *write, connreadfn_t *read); + +void internal_rm(const char *name); + + /* Is this a valid node name? */ bool is_valid_nodename(const char *node); diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c index 9ddcf4f728..fd2bbdf691 100644 --- a/tools/xenstore/xenstored_domain.c +++ b/tools/xenstore/xenstored_domain.c @@ -271,6 +271,8 @@ static struct domain *new_domain(void *context, unsigned int domid, list_add(&domain->list, &domains); talloc_set_destructor(domain, destroy_domain); + internal_rm(domain->path); + /* Tell kernel we're interested in this event. */ bind.remote_domain = domid; bind.remote_port = port; -- 2.30.2